Install all packages

SECTION 1: Data management

SECTION 2: Regression analysis

#Appendix A1

#Check dependent variable is normal.
boxplot(childdata2$obesity_rate,main="Boxplot of obesity rate")

#Appendix A2

#Check transformations of dependent variable and see which one will be normal.
symbox(~`obesity_rate`, childdata2, na.rm=T, powers=seq(-3,3,by=.5),main="Types of transformation",ylab="Obesity rate", xlab="Power of transformation")

#Based on this, use square root instead.
childdata2$root_ob<- sqrt(childdata2$obesity_rate)
boxplot(childdata2$root_ob,main="Boxplot of transformed obesity rate")

#Join main dataset and map data

#Appendix Table 1: Univariate regression

#Variable selection.
#Univariate regression first.

model_1 <- lm(root_ob ~ `Density of fast food outlets`, data = modeldata)#significant
summary(model_1)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.27677 -0.29938 -0.00988  0.29868  1.45546 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    3.6948563  0.0842665   43.85   <2e-16 ***
## `Density of fast food outlets` 0.0127375  0.0009756   13.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4625 on 313 degrees of freedom
## Multiple R-squared:  0.3526, Adjusted R-squared:  0.3505 
## F-statistic: 170.5 on 1 and 313 DF,  p-value: < 2.2e-16
model_2 <- lm(root_ob ~ `Active rate`, data = modeldata)#significant
summary(model_2)
## 
## Call:
## lm(formula = root_ob ~ `Active rate`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5186 -0.3957 -0.0229  0.4089  1.4139 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.8446     0.2481  23.558  < 2e-16 ***
## `Active rate`  -2.3662     0.5207  -4.544 8.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5489 on 266 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.07204,    Adjusted R-squared:  0.06855 
## F-statistic: 20.65 on 1 and 266 DF,  p-value: 8.378e-06
model_3 <- lm(root_ob ~ `Percentage of White children`, data = modeldata)#significant
summary(model_3)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of White children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33103 -0.35594  0.01714  0.34855  1.23013 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      6.1290     0.1569  39.073   <2e-16 ***
## `Percentage of White children`  -1.6286     0.1809  -9.001   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5123 on 313 degrees of freedom
## Multiple R-squared:  0.2056, Adjusted R-squared:  0.2031 
## F-statistic: 81.02 on 1 and 313 DF,  p-value: < 2.2e-16
model_4 <- lm(root_ob ~ `Percentage of Mixed children`, data = modeldata)#significant
summary(model_4)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42548 -0.41214  0.01901  0.44044  1.19231 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.51037    0.05736  78.630  < 2e-16 ***
## `Percentage of Mixed children`  5.98789    1.24793   4.798 2.48e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5547 on 313 degrees of freedom
## Multiple R-squared:  0.06852,    Adjusted R-squared:  0.06554 
## F-statistic: 23.02 on 1 and 313 DF,  p-value: 2.481e-06
model_5 <- lm(root_ob ~ `Percentage of Asian children`, data = modeldata)#significant
summary(model_5)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Asian children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.35098 -0.39116  0.00483  0.37521  1.20279 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.56331    0.03763 121.253  < 2e-16 ***
## `Percentage of Asian children`  2.50436    0.32550   7.694 1.87e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5271 on 313 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1564 
## F-statistic:  59.2 on 1 and 313 DF,  p-value: 1.874e-13
model_6 <- lm(root_ob ~ `Percentage of African children`, data = modeldata)#significant
summary(model_6)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of African children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37075 -0.40101  0.00913  0.40737  1.21564 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.61152    0.03298 139.840  < 2e-16 ***
## `Percentage of African children`  4.38738    0.51796   8.471 9.66e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5184 on 313 degrees of freedom
## Multiple R-squared:  0.1865, Adjusted R-squared:  0.1839 
## F-statistic: 71.75 on 1 and 313 DF,  p-value: 9.661e-16
model_7 <- lm(root_ob ~ `IMD income average score`, data = modeldata)#significant
summary(model_7)
## 
## Call:
## lm(formula = root_ob ~ `IMD income average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83874 -0.20839  0.01143  0.19565  0.90327 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.53386    0.05152   68.60   <2e-16 ***
## `IMD income average score` 10.38165    0.41299   25.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3308 on 313 degrees of freedom
## Multiple R-squared:  0.6688, Adjusted R-squared:  0.6677 
## F-statistic: 631.9 on 1 and 313 DF,  p-value: < 2.2e-16
model_8 <- lm(root_ob ~ `IMD employment average score`, data = modeldata)#significant
summary(model_8)
## 
## Call:
## lm(formula = root_ob ~ `IMD employment average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.95236 -0.26248 -0.01322  0.21784  1.14273 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.63327    0.06408   56.70   <2e-16 ***
## `IMD employment average score` 12.00710    0.65060   18.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3978 on 313 degrees of freedom
## Multiple R-squared:  0.5211, Adjusted R-squared:  0.5196 
## F-statistic: 340.6 on 1 and 313 DF,  p-value: < 2.2e-16
model_9 <- lm(root_ob ~ `IMD education average score`, data = modeldata)#significant
summary(model_9)
## 
## Call:
## lm(formula = root_ob ~ `IMD education average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92965 -0.30765 -0.06097  0.22807  1.38491 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   3.866392   0.064383   60.05   <2e-16 ***
## `IMD education average score` 0.041999   0.002851   14.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4417 on 313 degrees of freedom
## Multiple R-squared:  0.4095, Adjusted R-squared:  0.4076 
## F-statistic:   217 on 1 and 313 DF,  p-value: < 2.2e-16
model_10 <- lm(root_ob ~ `IMD health average score`, data = modeldata)#significant
summary(model_10)
## 
## Call:
## lm(formula = root_ob ~ `IMD health average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96863 -0.24078 -0.00531  0.19580  1.36286 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.82447    0.02311  208.74   <2e-16 ***
## `IMD health average score`  0.64498    0.03567   18.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.402 on 313 degrees of freedom
## Multiple R-squared:  0.5109, Adjusted R-squared:  0.5094 
## F-statistic:   327 on 1 and 313 DF,  p-value: < 2.2e-16
model_11 <- lm(root_ob ~ `IMD crime average score`, data = modeldata)#significant
summary(model_11)                                 
## 
## Call:
## lm(formula = root_ob ~ `IMD crime average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23527 -0.27003 -0.00924  0.28329  1.31520 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.83803    0.02583  187.33   <2e-16 ***
## `IMD crime average score`  0.71579    0.04895   14.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.443 on 313 degrees of freedom
## Multiple R-squared:  0.4059, Adjusted R-squared:  0.404 
## F-statistic: 213.8 on 1 and 313 DF,  p-value: < 2.2e-16
model_12 <- lm(root_ob ~ `IMD housing average score`, data = modeldata)#insignificant
summary(model_12)                       
## 
## Call:
## lm(formula = root_ob ~ `IMD housing average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.46163 -0.46095  0.01413  0.44805  1.42131 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.639841   0.118212  39.250   <2e-16 ***
## `IMD housing average score` 0.004669   0.005240   0.891    0.374    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5741 on 313 degrees of freedom
## Multiple R-squared:  0.002531,   Adjusted R-squared:  -0.0006562 
## F-statistic: 0.7941 on 1 and 313 DF,  p-value: 0.3735
model_13 <- lm(root_ob ~ `IMD living average score`, data = modeldata)#significant
summary(model_13) 
## 
## Call:
## lm(formula = root_ob ~ `IMD living average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.31999 -0.40801  0.01659  0.39484  1.30631 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.317863   0.078347  55.112  < 2e-16 ***
## `IMD living average score` 0.020840   0.003548   5.874 1.09e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5455 on 313 degrees of freedom
## Multiple R-squared:  0.09928,    Adjusted R-squared:  0.09641 
## F-statistic:  34.5 on 1 and 313 DF,  p-value: 1.088e-08
model_14 <- lm(root_ob ~ `IMD IDACI average score`, data = modeldata)#significant
summary(model_14)                           
## 
## Call:
## lm(formula = root_ob ~ `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76574 -0.21304 -0.00681  0.20497  0.96490 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.52748    0.05158   68.38   <2e-16 ***
## `IMD IDACI average score`  7.91058    0.31361   25.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3301 on 313 degrees of freedom
## Multiple R-squared:  0.6703, Adjusted R-squared:  0.6692 
## F-statistic: 636.3 on 1 and 313 DF,  p-value: < 2.2e-16
model_15 <- lm(root_ob ~ `Fairly active rate`, data = modeldata)#insignificant
summary(model_15) 
## 
## Call:
## lm(formula = root_ob ~ `Fairly active rate`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4100 -0.4365 -0.0132  0.4096  1.4411 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.9832     0.2022  24.641   <2e-16 ***
## `Fairly active rate`  -1.0781     0.8308  -1.298    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.567 on 265 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.006315,   Adjusted R-squared:  0.002565 
## F-statistic: 1.684 on 1 and 265 DF,  p-value: 0.1955
model_16 <- lm(root_ob ~ `Less active rate`, data = modeldata)#significant
summary(model_16)
## 
## Call:
## lm(formula = root_ob ~ `Less active rate`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.40039 -0.38736 -0.04242  0.38638  1.43481 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.7469     0.1612   23.24  < 2e-16 ***
## `Less active rate`   3.3987     0.5473    6.21 2.03e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5325 on 266 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.1266, Adjusted R-squared:  0.1233 
## F-statistic: 38.56 on 1 and 266 DF,  p-value: 2.03e-09

#Check correlation of model variables.

##Figure 2. #visualise the correlation matrix

#visualise the correlation matrix
ggcorrplot(corr, hc.order = TRUE, type = "lower",
           outline.col = "white",tl.cex = 11,
           colors = c("#6D9EC1", "white", "#E46726"))

#Final model selection based on F test, adjusted R etc.

#Final model selection based on F test, adjusted R etc.
model_a <-lm(root_ob ~`Density of fast food outlets`+   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`+`Less active rate`, data = modeldata)
summary(model_a)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` + 
##     `Percentage of Asian children` + `Percentage of African children` + 
##     `IMD education average score` + `IMD health average score` + 
##     `IMD crime average score` + `IMD living average score` + 
##     `IMD IDACI average score` + `Less active rate`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78546 -0.19218  0.00912  0.18340  0.74284 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.015791   0.190873  21.039  < 2e-16 ***
## `Density of fast food outlets`   -0.001369   0.001109  -1.235 0.217986    
## `Percentage of Mixed children`   -4.626314   1.398842  -3.307 0.001077 ** 
## `Percentage of Asian children`    1.362550   0.265761   5.127  5.8e-07 ***
## `Percentage of African children`  5.057605   0.589319   8.582  9.0e-16 ***
## `IMD education average score`     0.016403   0.004176   3.928 0.000110 ***
## `IMD health average score`        0.253100   0.072771   3.478 0.000593 ***
## `IMD crime average score`        -0.053869   0.057342  -0.939 0.348392    
## `IMD living average score`       -0.004143   0.002324  -1.783 0.075783 .  
## `IMD IDACI average score`         2.928125   0.897886   3.261 0.001260 ** 
## `Less active rate`                0.306062   0.304036   1.007 0.315043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2655 on 257 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.7902, Adjusted R-squared:  0.782 
## F-statistic:  96.8 on 10 and 257 DF,  p-value: < 2.2e-16
#Remove less active rate
model_b <-lm(root_ob ~`Density of fast food outlets`+   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`, data = modeldata)
summary(model_b)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` + 
##     `Percentage of Asian children` + `Percentage of African children` + 
##     `IMD education average score` + `IMD health average score` + 
##     `IMD crime average score` + `IMD living average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73966 -0.18434 -0.01111  0.18002  1.23216 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.866e+00  1.467e-01  26.345  < 2e-16 ***
## `Density of fast food outlets`   -5.435e-05  9.435e-04  -0.058 0.954099    
## `Percentage of Mixed children`   -3.148e+00  1.348e+00  -2.336 0.020165 *  
## `Percentage of Asian children`    1.171e+00  2.258e-01   5.188 3.89e-07 ***
## `Percentage of African children`  4.485e+00  5.398e-01   8.309 3.21e-15 ***
## `IMD education average score`     1.997e-02  3.791e-03   5.268 2.61e-07 ***
## `IMD health average score`        1.730e-01  6.652e-02   2.601 0.009743 ** 
## `IMD crime average score`        -3.687e-02  5.348e-02  -0.689 0.491088    
## `IMD living average score`       -3.138e-03  2.183e-03  -1.438 0.151476    
## `IMD IDACI average score`         2.936e+00  8.213e-01   3.575 0.000406 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2779 on 305 degrees of freedom
## Multiple R-squared:  0.7722, Adjusted R-squared:  0.7655 
## F-statistic: 114.9 on 9 and 305 DF,  p-value: < 2.2e-16
#Remove fast food shop density
model_c <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`, data = modeldata)
summary(model_c) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD crime average score` + 
##     `IMD living average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7391 -0.1841 -0.0108  0.1802  1.2335 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.862160   0.131781  29.307  < 2e-16 ***
## `Percentage of Mixed children`   -3.157068   1.335894  -2.363 0.018740 *  
## `Percentage of Asian children`    1.170565   0.225049   5.201 3.63e-07 ***
## `Percentage of African children`  4.488435   0.536011   8.374 2.03e-15 ***
## `IMD education average score`     0.020010   0.003727   5.368 1.57e-07 ***
## `IMD health average score`        0.171782   0.062841   2.734 0.006629 ** 
## `IMD crime average score`        -0.037292   0.052893  -0.705 0.481324    
## `IMD living average score`       -0.003164   0.002132  -1.484 0.138755    
## `IMD IDACI average score`         2.930233   0.813056   3.604 0.000366 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2774 on 306 degrees of freedom
## Multiple R-squared:  0.7722, Adjusted R-squared:  0.7663 
## F-statistic: 129.7 on 8 and 306 DF,  p-value: < 2.2e-16
#Remove living average score
model_d <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+  `IMD IDACI average score`, data = modeldata)
summary(model_d) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD crime average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71569 -0.18310 -0.00798  0.18331  1.16380 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.812813   0.127768  29.842  < 2e-16 ***
## `Percentage of Mixed children`   -3.215825   1.337921  -2.404 0.016827 *  
## `Percentage of Asian children`    1.103218   0.220859   4.995 9.89e-07 ***
## `Percentage of African children`  4.396905   0.533494   8.242 4.99e-15 ***
## `IMD education average score`     0.020879   0.003688   5.661 3.45e-08 ***
## `IMD health average score`        0.156840   0.062151   2.524 0.012122 *  
## `IMD crime average score`        -0.022629   0.052065  -0.435 0.664138    
## `IMD IDACI average score`         2.778762   0.808206   3.438 0.000667 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.278 on 307 degrees of freedom
## Multiple R-squared:  0.7706, Adjusted R-squared:  0.7653 
## F-statistic: 147.3 on 7 and 307 DF,  p-value: < 2.2e-16
#Remove crime average score instead of living average score
model_e <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_e) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD living average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7369 -0.1846 -0.0076  0.1777  1.2215 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.904688   0.117068  33.354  < 2e-16 ***
## `Percentage of Mixed children`   -3.420394   1.281570  -2.669 0.008014 ** 
## `Percentage of Asian children`    1.136345   0.219573   5.175 4.12e-07 ***
## `Percentage of African children`  4.526474   0.532851   8.495 8.68e-16 ***
## `IMD education average score`     0.019997   0.003724   5.370 1.56e-07 ***
## `IMD health average score`        0.169717   0.062721   2.706 0.007193 ** 
## `IMD living average score`       -0.002884   0.002093  -1.378 0.169209    
## `IMD IDACI average score`         2.723392   0.757660   3.594 0.000379 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2772 on 307 degrees of freedom
## Multiple R-squared:  0.7718, Adjusted R-squared:  0.7666 
## F-statistic: 148.4 on 7 and 307 DF,  p-value: < 2.2e-16
#Remove crime average score and living average score
model_f <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`, data = modeldata)
summary(model_f)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.842304   0.108118  35.538  < 2e-16 ***
## `Percentage of Mixed children`   -3.378109   1.283071  -2.633 0.008895 ** 
## `Percentage of Asian children`    1.085460   0.216761   5.008 9.29e-07 ***
## `Percentage of African children`  4.425927   0.528602   8.373 2.00e-15 ***
## `IMD education average score`     0.020823   0.003681   5.657 3.52e-08 ***
## `IMD health average score`        0.156375   0.062060   2.520 0.012249 *  
## `IMD IDACI average score`         2.657163   0.757236   3.509 0.000517 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16

#Stepwise AIC and best subset variable selection (reference only).

# stepwise aic regression
#Run for all variables
ols_step_both_aic(model_a)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Less active rate`                  addition    155.185    27.175    59.176    0.68529      0.68292 
## `Percentage of African children`    addition    117.903    23.470    62.881    0.72820      0.72511 
## `IMD education average score`       addition     96.236    21.486    64.865    0.75117      0.74739 
## `IMD health average score`          addition     83.766    20.357    65.994    0.76425      0.75975 
## `Percentage of Asian children`      addition     74.845    19.544    66.807    0.77367      0.76846 
## `Percentage of Mixed children`      addition     63.230    18.576    67.775    0.78488      0.77908 
## `IMD living average score`          addition     61.559    18.323    68.028    0.78780      0.78125 
## `Density of fast food outlets`      addition     61.427    18.178    68.173    0.78948      0.78214 
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_a)
ols_step_both_aic(model_a, details = FALSE)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Less active rate`                  addition    155.185    27.175    59.176    0.68529      0.68292 
## `Percentage of African children`    addition    117.903    23.470    62.881    0.72820      0.72511 
## `IMD education average score`       addition     96.236    21.486    64.865    0.75117      0.74739 
## `IMD health average score`          addition     83.766    20.357    65.994    0.76425      0.75975 
## `Percentage of Asian children`      addition     74.845    19.544    66.807    0.77367      0.76846 
## `Percentage of Mixed children`      addition     63.230    18.576    67.775    0.78488      0.77908 
## `IMD living average score`          addition     61.559    18.323    68.028    0.78780      0.78125 
## `Density of fast food outlets`      addition     61.427    18.178    68.173    0.78948      0.78214 
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_a)
#Run for model f
ols_step_both_aic(model_f)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Percentage of African children`    addition    161.543    30.028    73.380    0.70962      0.70776 
## `IMD education average score`       addition    123.944    26.481    76.928    0.74392      0.74145 
## `Percentage of Asian children`      addition    109.707    25.150    78.258    0.75679      0.75365 
## `Percentage of Mixed children`      addition     99.939    24.228    79.180    0.76570      0.76191 
## `IMD health average score`          addition     95.512    23.739    79.670    0.77044      0.76596 
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_f)
ols_step_both_aic(model_f, details = FALSE)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Percentage of African children`    addition    161.543    30.028    73.380    0.70962      0.70776 
## `IMD education average score`       addition    123.944    26.481    76.928    0.74392      0.74145 
## `Percentage of Asian children`      addition    109.707    25.150    78.258    0.75679      0.75365 
## `Percentage of Mixed children`      addition     99.939    24.228    79.180    0.76570      0.76191 
## `IMD health average score`          addition     95.512    23.739    79.670    0.77044      0.76596 
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_f)
#save the residuals into dataframe
finalmodel<- lm(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`, data = modeldata)
summary(finalmodel) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.842304   0.108118  35.538  < 2e-16 ***
## `Percentage of Mixed children`   -3.378109   1.283071  -2.633 0.008895 ** 
## `Percentage of Asian children`    1.085460   0.216761   5.008 9.29e-07 ***
## `Percentage of African children`  4.425927   0.528602   8.373 2.00e-15 ***
## `IMD education average score`     0.020823   0.003681   5.657 3.52e-08 ***
## `IMD health average score`        0.156375   0.062060   2.520 0.012249 *  
## `IMD IDACI average score`         2.657163   0.757236   3.509 0.000517 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16
#Check multicollinearity
vif(finalmodel)
##   `Percentage of Mixed children`   `Percentage of Asian children` 
##                         4.220859                         1.598609 
## `Percentage of African children`    `IMD education average score` 
##                         3.631903                         4.220017 
##       `IMD health average score`        `IMD IDACI average score` 
##                         6.346407                         8.240346
ObesityMAP$`Model residuals`<- finalmodel$residuals

#Figure 3. Diagnostic plots for OLS regression model

#residual plot
qplot(finalmodel$`Model residuals`) + geom_histogram() 

par(mfrow = c(2, 2))  # Split the plotting panel into a 2 x 2 grid
plot(finalmodel)  # Plot the model residuals

#Cross-validation of model

###Cross-validation of model
#####Cross-Validation 
set.seed(48)
regressdata<-modeldata[,c("Percentage of Mixed children","Percentage of Asian children",
                          "Percentage of African children","IMD education average score",
                          "IMD health average score","IMD IDACI average score",
                       "root_ob" )]
model<- train(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,
               regressdata,
              method="lm", 
              trControl=trainControl(method="repeatedcv",
                                     number=10,
                                     verboseIter=TRUE))
## + Fold01.Rep1: intercept=TRUE 
## - Fold01.Rep1: intercept=TRUE 
## + Fold02.Rep1: intercept=TRUE 
## - Fold02.Rep1: intercept=TRUE 
## + Fold03.Rep1: intercept=TRUE 
## - Fold03.Rep1: intercept=TRUE 
## + Fold04.Rep1: intercept=TRUE 
## - Fold04.Rep1: intercept=TRUE 
## + Fold05.Rep1: intercept=TRUE 
## - Fold05.Rep1: intercept=TRUE 
## + Fold06.Rep1: intercept=TRUE 
## - Fold06.Rep1: intercept=TRUE 
## + Fold07.Rep1: intercept=TRUE 
## - Fold07.Rep1: intercept=TRUE 
## + Fold08.Rep1: intercept=TRUE 
## - Fold08.Rep1: intercept=TRUE 
## + Fold09.Rep1: intercept=TRUE 
## - Fold09.Rep1: intercept=TRUE 
## + Fold10.Rep1: intercept=TRUE 
## - Fold10.Rep1: intercept=TRUE 
## Aggregating results
## Fitting final model on full training set
summary(model)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             3.842304   0.108118  35.538  < 2e-16
## `\\`Percentage of Mixed children\\``   -3.378109   1.283071  -2.633 0.008895
## `\\`Percentage of Asian children\\``    1.085460   0.216761   5.008 9.29e-07
## `\\`Percentage of African children\\``  4.425927   0.528602   8.373 2.00e-15
## `\\`IMD education average score\\``     0.020823   0.003681   5.657 3.52e-08
## `\\`IMD health average score\\``        0.156375   0.062060   2.520 0.012249
## `\\`IMD IDACI average score\\``         2.657163   0.757236   3.509 0.000517
##                                           
## (Intercept)                            ***
## `\\`Percentage of Mixed children\\``   ** 
## `\\`Percentage of Asian children\\``   ***
## `\\`Percentage of African children\\`` ***
## `\\`IMD education average score\\``    ***
## `\\`IMD health average score\\``       *  
## `\\`IMD IDACI average score\\``        ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16

#print R2, MAE and RMSE

#print R2, MAE and RMSE
print(model)
## Linear Regression 
## 
## 315 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 283, 284, 283, 284, 283, 283, ... 
## Resampling results:
## 
##   RMSE       Rsquared   MAE      
##   0.2795769  0.7716322  0.2217382
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

#Now check for spatial auto-correlation

ObesityMAP <- as(ObesityMAP,"Spatial")
names(ObesityMAP)
##  [1] "GSS_CODE"                                        
##  [2] "FID"                                             
##  [3] "LAD19NM"                                         
##  [4] "LAD19NMW"                                        
##  [5] "BNG_E"                                           
##  [6] "BNG_N"                                           
##  [7] "LONG"                                            
##  [8] "LAT"                                             
##  [9] "Shape__Area"                                     
## [10] "Shape__Length"                                   
## [11] "obesity_rate"                                    
## [12] "Density.of.fast.food.outlets"                    
## [13] "Active.rate"                                     
## [14] "Percentage.of.White.children"                    
## [15] "Percentage.of.Mixed.children"                    
## [16] "Percentage.of.Asian.children"                    
## [17] "Percentage.of.African.children"                  
## [18] "Income...Average.rank"                           
## [19] "Employment...Average.rank"                       
## [20] "Education..Skills.and.Training...Average.rank"   
## [21] "Health.Deprivation.and.Disability...Average.rank"
## [22] "Crime...Average.rank"                            
## [23] "Barriers.to.Housing.and.Services...Average.rank" 
## [24] "Living.Environment...Average.rank"               
## [25] "IDACI...Average.rank"                            
## [26] "IMD.income.average.score"                        
## [27] "IMD.employment.average.score"                    
## [28] "IMD.education.average.score"                     
## [29] "IMD.health.average.score"                        
## [30] "IMD.crime.average.score"                         
## [31] "IMD.housing.average.score"                       
## [32] "IMD.living.average.score"                        
## [33] "IMD.IDACI.average.score"                         
## [34] "Fairly.active.rate"                              
## [35] "Less.active.rate"                                
## [36] "root_ob"                                         
## [37] "Model.residuals"
#Calculate centriod of each LA.
coordsW <- coordinates(ObesityMAP)
plot(coordsW)

#Neighbours list of queens contiguity
LWard_nb <- poly2nb(ObesityMAP, queen=T)

#and nearest neighbours
knn_wards <- knearneigh(coordsW, k=4)
LWard_knn <- knn2nb(knn_wards)

#plot and add a map
plot(LWard_nb, coordinates(coordsW), col="red")

plot(LWard_knn, coordinates(coordsW), col="blue")

plot(ObesityMAP)

#create a spatial weights matrix 
Lward.queens_weight <- nb2listw(LWard_nb, style="C",zero.policy=TRUE)
Lward.knn_4_weight <- nb2listw(LWard_knn, style="C",zero.policy=TRUE)

#Run Moran’s I using the residuals from our final model #first using queens neighbours

moran.test(ObesityMAP@data$`Model.residuals`, Lward.queens_weight,zero.policy=TRUE)
## 
##  Moran I test under randomisation
## 
## data:  ObesityMAP@data$Model.residuals  
## weights: Lward.queens_weight  n reduced by no-neighbour observations
##   
## 
## Moran I statistic standard deviate = 7.9544, p-value = 9e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.271642496      -0.003194888       0.001193815

#Then knn = 4

moran.test(ObesityMAP@data$`Model.residuals`, Lward.knn_4_weight,zero.policy=TRUE)
## 
##  Moran I test under randomisation
## 
## data:  ObesityMAP@data$Model.residuals  
## weights: Lward.knn_4_weight    
## 
## Moran I statistic standard deviate = 7.8037, p-value = 3.005e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.285285412      -0.003184713       0.001366461

##GWR

#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,data = modeldata,coords=coordsW,adapt=T)
## Adaptive q: 0.381966 CV score: 22.7907 
## Adaptive q: 0.618034 CV score: 23.71091 
## Adaptive q: 0.236068 CV score: 22.32975 
## Adaptive q: 0.145898 CV score: 22.26535 
## Adaptive q: 0.1565004 CV score: 22.27587 
## Adaptive q: 0.09016994 CV score: 21.82901 
## Adaptive q: 0.05572809 CV score: 21.04139 
## Adaptive q: 0.03444185 CV score: 20.49357 
## Adaptive q: 0.02128624 CV score: 22.45724 
## Adaptive q: 0.04257247 CV score: 20.67178 
## Adaptive q: 0.02941686 CV score: 20.76158 
## Adaptive q: 0.03659131 CV score: 20.55597 
## Adaptive q: 0.03252248 CV score: 20.55079 
## Adaptive q: 0.03451283 CV score: 20.49199 
## Adaptive q: 0.03530674 CV score: 20.49814 
## Adaptive q: 0.03479792 CV score: 20.48601 
## Adaptive q: 0.03499227 CV score: 20.4862 
## Adaptive q: 0.03488435 CV score: 20.48431 
## Adaptive q: 0.03492504 CV score: 20.48376 
## Adaptive q: 0.03492504 CV score: 20.48376
#run the gwr model
gwr.model = gwr(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,data = modeldata,coords=coordsW, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)

#print the results of the model
gwr.model
## Call:
## gwr(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata, 
##     coords = coordsW, adapt = GWRbandwidth, hatmatrix = TRUE, 
##     se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.03492504 (about 11 of 315 data points)
## Summary of GWR coefficient estimates at data points:
##                                          Min.     1st Qu.      Median
## X.Intercept.                        2.2379325   2.9897034   3.4863385
## X.Percentage.of.Mixed.children.   -22.5861202  -3.5869974  -0.8864863
## X.Percentage.of.Asian.children.    -2.5524665   0.2057791   1.1725359
## X.Percentage.of.African.children.  -8.5687184   2.1280087   3.3279073
## X.IMD.education.average.score.     -0.0072453   0.0185664   0.0262770
## X.IMD.health.average.score.        -0.6729346  -0.1458490  -0.0352223
## X.IMD.IDACI.average.score.         -1.1677115   2.9253575   4.3309386
##                                       3rd Qu.        Max.  Global
## X.Intercept.                        3.9000246   4.5576890  3.8423
## X.Percentage.of.Mixed.children.     1.4186154  14.1164053 -3.3781
## X.Percentage.of.Asian.children.     1.5801901   2.7330463  1.0855
## X.Percentage.of.African.children.   4.1428690  10.3270702  4.4259
## X.IMD.education.average.score.      0.0332300   0.0485405  0.0208
## X.IMD.health.average.score.         0.0863856   0.3934210  0.1564
## X.IMD.IDACI.average.score.          5.3997008   9.6079104  2.6572
## Number of data points: 315 
## Effective number of parameters (residual: 2traceS - traceS'S): 92.88358 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 222.1164 
## Sigma (residual: 2traceS - traceS'S): 0.2308206 
## Effective number of parameters (model: traceS): 68.62676 
## Effective degrees of freedom (model: traceS): 246.3732 
## Sigma (model: traceS): 0.2191634 
## Sigma (ML): 0.1938249 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 39.72662 
## AIC (GWR p. 96, eq. 4.22): -71.14606 
## Residual sum of squares: 11.83395 
## Quasi-global R2: 0.8855612

#Attach coefficients to original dataframe

results<-as.data.frame(gwr.model$SDF)
names(results)
##  [1] "sum.w"                                   
##  [2] "X.Intercept."                            
##  [3] "X.Percentage.of.Mixed.children."         
##  [4] "X.Percentage.of.Asian.children."         
##  [5] "X.Percentage.of.African.children."       
##  [6] "X.IMD.education.average.score."          
##  [7] "X.IMD.health.average.score."             
##  [8] "X.IMD.IDACI.average.score."              
##  [9] "X.Intercept._se"                         
## [10] "X.Percentage.of.Mixed.children._se"      
## [11] "X.Percentage.of.Asian.children._se"      
## [12] "X.Percentage.of.African.children._se"    
## [13] "X.IMD.education.average.score._se"       
## [14] "X.IMD.health.average.score._se"          
## [15] "X.IMD.IDACI.average.score._se"           
## [16] "gwr.e"                                   
## [17] "pred"                                    
## [18] "pred.se"                                 
## [19] "localR2"                                 
## [20] "X.Intercept._se_EDF"                     
## [21] "X.Percentage.of.Mixed.children._se_EDF"  
## [22] "X.Percentage.of.Asian.children._se_EDF"  
## [23] "X.Percentage.of.African.children._se_EDF"
## [24] "X.IMD.education.average.score._se_EDF"   
## [25] "X.IMD.health.average.score._se_EDF"      
## [26] "X.IMD.IDACI.average.score._se_EDF"       
## [27] "pred.se.1"                               
## [28] "coord.x"                                 
## [29] "coord.y"
ObesityMAP@data$coefr2<-results$localR2
ObesityMAP@data$coefmixed<-results$X.Percentage.of.Mixed.children.
ObesityMAP@data$coefafrican<-results$X.Percentage.of.African.children.
ObesityMAP@data$coefedu<-results$X.IMD.education.average.score.
ObesityMAP@data$coefhealth<-results$X.IMD.health.average.score.
ObesityMAP@data$coefasian<-results$X.Percentage.of.Asian.children.
ObesityMAP@data$coefidaci<-results$X.IMD.IDACI.average.score.

Maps

#Local R-squared

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
  tm_polygons(col = "coefr2", title = "Local R-squared", breaks = c(-Inf,0.7,0.75,0.8,0.85,0.9,0.95, Inf), palette = "YlGnBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
  tm_polygons(col = "coefafrican", title = "Coefficient for percentage of African children", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefafrican" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#Percentage of mixed children

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefmixed", title = "Coefficient for percentage of Mixed children",breaks = c(-Inf,-20,-10,-5,0,5,10,15, Inf),  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefmixed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#Percentage of Asian children

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefasian", title = "Coefficient for percentage of Asian children",  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefasian" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IDACI score

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefidaci", title = "Coefficient for IMD IDACI average acore", breaks = c(-Inf,-2,0,2,4,6,8, Inf), palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefidaci" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IMD Health score

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefhealth", title = "Coefficient for IMD Health average acore",  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefhealth" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IMD education

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefedu", title = "Coefficient for IMD Education average acore", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefedu" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

SECTION 3: Cluster analysis

SECTION 3: Cluster analysis

#render("Report code.Rmd", output_dir = output_dir, params = list(output_dir = output_dir))
modeldata2<- childdata2[,c("GSS_CODE","root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
clusterdata<- childdata2[,c("root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
value <- colnames(clusterdata)
# creates a new data frame
stand_data <- clusterdata
for(i in 1: ncol (clusterdata)){
stand_data[, value[i]] <- scale(as.numeric(modeldata2[, value[i]]))
}

#K-means clustering (k=5)

Km <- kmeans(stand_data, 5, nstart = 25, iter.max = 1000)
KmClusters <- as.matrix(Km$cluster)
KmClusters <- as.data.frame(KmClusters)
KmCenters <- as.matrix(Km$centers)
KmCenters <- as.data.frame(KmCenters)
#Number of LAs in each cluster
table(KmClusters)
## KmClusters
##   1   2   3   4   5 
##  80 109  80  21  25

#Validate choice of k

# Total within-cluster sum of square
wss <- NULL
for (i in 1:15) wss[i] <- kmeans(stand_data,centers = i,iter.max = 1000)$tot.withinss
plot(1:15, wss, type = "b", pch = 19, xlab = "Number of Clusters",
ylab = "Total within-cluster sum of squares")

#Cluster plot of the first 2 principal components

library(cluster)
clusplot(stand_data,Km$cluster, color = TRUE, shade = FALSE,
labels = 4, lines = 0, plotchar = FALSE)

#More cluster plot
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(Km, data = stand_data, geom = "point", ellipse = F, pointsize = 0.5,
ggtheme = theme_classic())

#Radial plot for each group

library(plotrix)
# creates a object of 5 zeros (5 groups)
KmCenters[5,]<- c(0)
par(cex.axis = 0.8, cex.lab = 0.8)
radial.plot(KmCenters[c(1,5),], labels = colnames(KmCenters),
boxed.radial = FALSE, show.radial.grid = TRUE,
line.col = c("blue", "red"), radlab = TRUE,
rp.type = "p", show.grid.labels = 3)

# Creates a map of cluster groups

#Join the cluster labels to the GSS codes
GSScode<- childdata2[,c(1,2)]
Classification <- as.data.frame(cbind(as.character(GSScode[,1]), KmClusters[,1]))
names(Classification) <- c("GSS_CODE", "Classification")

OA.Class<- merge(ObesityMAP, Classification, by.x = "GSS_CODE", by.y = "GSS_CODE",duplicateGeoms = TRUE)

# creates a map in R
tm_shape(OA.Class) +
  tm_polygons(col = "Classification", title = "Cluster groups", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Warning in tm_format_World(title = NA, legend.width = 0.8, scale = 1,
## legend.position = c("left", : tm_format_World is deprecated as of tmap version
## 2.0. Please use tm_format("World", ...) instead
## Compass not supported in view mode.
## Warning: The shape OA.Class is invalid. See sf::st_is_valid
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.